import requests
from bs4 import BeautifulSoup
import pymongo

myclient = pymongo.MongoClient("mongodb://localhost:27017/")
mydb = myclient["movieRecommend"]
mycol = mydb["movies"]
start = 0

BASE_URL = "http://movie.douban.com/top250?start="
header = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36'}
for i in range(0,10):
    html = requests.get(BASE_URL + str(start), headers = header)
    html.encoding = 'utf-8'
    start += 25
    count = 1
    soup = BeautifulSoup(html.text, 'html.parser')
    print(soup)
    for item in soup.findAll('div', 'item'):
        print("正在保存第" + str(start + count) + "条数据")
        count = count + 1
        picture = item.find('div', class_='pic').a.img['src']
        title = item.find('span', class_='title').get_text()
        star = item.find('span', class_='rating_num').get_text()
        backgroud = item.find('div', class_='bd').p.get_text()
        director = backgroud.split('\n')[1].strip()
        backgroudAll = backgroud.split('\n')[2]
        date = str(backgroudAll.split('/')[0]).strip()
        country = str(backgroudAll.split('/')[1]).strip()
        category = str(backgroudAll.split('/')[2]).strip().split(' ')
        cntEle = item.find('div', class_= 'bd').find('span', class_= 'inq')
        if cntEle :
            content = item.find('div', class_= 'bd').find('span', class_= 'inq').get_text()
        else :
            content = ""
        star = item.find('span', class_= 'rating_num').get_text()
        contentNode = item.find('span', class_= 'inq')
        col = {
            "picture": picture,
            "title": title,
            "director": director,
            "year": date,
            "star": star,
            "country": country,
            "category": category,
            "content": content,
        }
        mycol.insert_one(col)

# 需要安装的依赖
# 1.BeautifulSoup(pip install BeautifulSoup)
# 2.pymongo(pip install pymongo)